Vendor numpy median, percentile, and quantile reduction tests#567
Vendor numpy median, percentile, and quantile reduction tests#567brandon-b-miller wants to merge 30 commits intoNVIDIA:mainfrom
median, percentile, and quantile reduction tests#567Conversation
Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
|
Auto-sync is disabled for draft pull requests in this repository. Workflows must be run manually. Contributors can view more details about this message here. |
median, percentile, and quantile reductionsmedian, percentile, and quantile reduction tests
|
Auto-sync is disabled for ready for review pull requests in this repository. Workflows must be run manually. Contributors can view more details about this message here. |
|
/ok to test |
Greptile SummaryVendors comprehensive tests for Key changes:
Issues found:
Confidence Score: 3/5
Important Files Changed
|
There was a problem hiding this comment.
Additional Comments (1)
-
numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py, line 25-36 (link)logic: redundant config management -
NRTEnablingCUDATestCasealready managesself.old_nrt_settingandconfig.CUDA_ENABLE_NRTin its ownsetUp/tearDownthis creates duplicate state and could cause issues. remove the NRT-related lines:
7 files reviewed, 2 comments
| assert rettype.layout in "CF" | ||
| if arytype.layout == rettype.layout: | ||
| # Fast path: memcpy | ||
| cgutils.raw_memcpy( | ||
| builder, dest_data, src_data, ary.nitems, ary.itemsize, align=1 | ||
| ) | ||
|
|
||
| else: | ||
| src_strides = cgutils.unpack_tuple(builder, ary.strides) | ||
| dest_strides = cgutils.unpack_tuple(builder, ret.strides) | ||
| intp_t = context.get_value_type(types.intp) | ||
| src_strides = cgutils.unpack_tuple(builder, ary.strides) | ||
| dest_strides = cgutils.unpack_tuple(builder, ret.strides) | ||
| intp_t = context.get_value_type(types.intp) | ||
|
|
||
| with cgutils.loop_nest(builder, shapes, intp_t) as indices: | ||
| src_ptr = cgutils.get_item_pointer2( | ||
| context, | ||
| builder, | ||
| src_data, | ||
| shapes, | ||
| src_strides, | ||
| arytype.layout, | ||
| indices, | ||
| ) | ||
| dest_ptr = cgutils.get_item_pointer2( | ||
| context, | ||
| builder, | ||
| dest_data, | ||
| shapes, | ||
| dest_strides, | ||
| rettype.layout, | ||
| indices, | ||
| ) | ||
| builder.store(builder.load(src_ptr), dest_ptr) | ||
| with cgutils.loop_nest(builder, shapes, intp_t) as indices: | ||
| src_ptr = cgutils.get_item_pointer2( | ||
| context, | ||
| builder, | ||
| src_data, | ||
| shapes, | ||
| src_strides, | ||
| arytype.layout, | ||
| indices, | ||
| ) | ||
| dest_ptr = cgutils.get_item_pointer2( | ||
| context, | ||
| builder, | ||
| dest_data, | ||
| shapes, | ||
| dest_strides, | ||
| rettype.layout, | ||
| indices, | ||
| ) | ||
| builder.store(builder.load(src_ptr), dest_ptr) |
There was a problem hiding this comment.
logic: removed memcpy fast path optimization when source and destination layouts match - this causes performance regression for same-layout copies
the original code had:
if arytype.layout == rettype.layout:
# Fast path: memcpy
cgutils.raw_memcpy(builder, dest_data, src_data, ary.nitems, ary.itemsize, align=1)consider restoring the fast path for performance
Follow up #523